*** Set paths
local dir "..."
local dropbox_data "..."
local dropbox_data_create "..."
local local_data "..."
local dropbox_fig "..."
local dropbox_tab "..."


************************************************************
*** COLLAPSE DATA TO AVOID DOUBLE COUNTING
************************************************************

*Collapse to avoid double counting
collapse IIROC_PRICE quote quote_rel quote_0  share_won_of_dem share_won_of_dem_0 log_shares_H share_ratio_H  share_Ei_H share_E0_H days_post_auction issuesum_tot, by(bidderid  period b_is_d homedealerID homedealer large_trade)

*Average since quotes don't vary - collapse per bidderid and homedealer 
collapse  log_shares_H share_Ei_H share_won_of_dem quote quote_rel  , by(period bidderid homedealer large_trade)
 
 
************************************************************
*** IV REGRESSION -INPUT FOR APPENDIX TABLE A6  
************************************************************

*drop benchmark dealer 
drop if bidderid==$basedealer

*to gain sufficient power and avoid zero market share issues, pool the smallest dealers who have few trades on the platform from non-home investors
gen bidderid3= bidderid
replace bidderid3 = 46 if bidderid==61 | bidderid==94 | bidderid==18 

*date and dealer dummies + choose base 
tabulate period, gen(var_)
ren var_1 base1
tabulate bidderid3, gen(bidder_)
ren  bidder_4 base2 // 88 as in the no-homedealer version

*Split loyalty into L and S trades 
gen homedealerL = homedealer * large_trade 
gen homedealerS = homedealer * (1-large_trade)


*Use the code below to save regression output to latex - needs to be uncommented and can't be run together with the rest 
/*
	eststo: reghdfe quote_rel  share_won_of_dem homedealerL homedealerS , absorb(period bidderid3) 
	predict zfs2, xb
	label var zfs2 "rel quote"
	eststo: reghdfe log_shares  quote_rel  homedealerL homedealerS  , absorb(period bidderid3) 
	
	eststo: reghdfe log_shares  zfs2   homedealerL homedealerS , absorb(period bidderid3 ) 
	esttab using  "`dropbox_tab'/IV_reg2_$week _$homedealer _$keepall _$largetrade _$fej _$instrument.tex", wide se ar2 nobaselevels label mtitle("1st"  "OLS"   "IV")  nonumbers  replace 
	eststo clear

	eststo: ivreg2 log_shares var_* bidder_* homedealerL homedealerS  (quote_rel=share_won_of_dem) 
	esttab using  "`dropbox_tab'/IV_reg1_$week _$homedealer _$keepall _$largetrade _$fej _$instrument.tex",  se ar2  keep(quote_rel) wide nobaselevels label  replace 
	eststo clear
*/

	
*label variables
label var share_won_of_dem "share won"
label var quote_rel "rel quote"
replace share_won_of_dem=share_won_of_dem/1000


if($fej ==1){
	
	*stage 1)  X on Z, and predics
	reg quote_rel  share_won_of_dem var_*  bidder_*  homedealerL homedealerS if share_won_of_dem!=. 
	predict zfs, xb
	*stage 2) 
	reg log_shares_H  zfs   var_*   bidder_* homedealerL homedealerS

	*compute sigmahat
	gen alphahat = _b[zfs]
	gen sigma 	 = 1/alphahat
}

if($fej ==0 ){
	*stage 1)  X on Z, and predics
	reg quote_rel  share_won_of_dem var_*    homedealerL homedealerS if share_won_of_dem!=. 
	predict zfs, xb
	*stage 2) 
	reg log_shares_H  zfs   var_*    homedealerL homedealerS
	
	gen alphahat = _b[zfs]
	gen sigma 	 = 1/alphahat
}


*store the fixed effects	
gen fet=. 
ds  var_* 
foreach var in `r(varlist)'{
replace fet =  _b[`var']  if `var'==1 

}
replace fet =0 if base1==1
tab period if fet==0
	*--> should be only one if nothing dropped out due to colinearity


*dealer fixed effect
if($fej ==1){
	gen fej =.
	ds bidder_*
	foreach var in `r(varlist)'{
		replace fej =  _b[`var']  if `var'==1 
	}
}
if($fej ==0){
	gen fej=0
}

	
replace fej =0 if base2==1 
tab bidderid if fej==0
	*---> should be the omitted bidder
			

*get xij
predict resids, residuals
gen double constant =_b[_cons] 

gen delta 		= _b[homedealerS] if large_trade==0
replace delta 	= _b[homedealerL] if large_trade==1

	
************************************************************
*** MERGE BACK WITH ALL DATA
************************************************************
		
keep period bidderid homedealer alphahat sigma  fet fej  resids constant delta large_trade

tempfile tempxi
save  `tempxi' , replace


use `temprest', clear
merge m:1 period bidderid homedealer large_trade using `tempxi'
drop if market_type=="auction"

	 tab bidderid if _merge==1 & trackclient=="market-wide"
		*should be:
			*any trade by the baseline dealer
			*any trade by a dealer that is not selling on that day on CanDeal

	drop _merge

	
*bring back the retail investors			
if ($keepall)==0 {	
	append using "`local_data'/data_base_retailer_2022.dta"
}


	
************************************************************
*** PREPARE TO OUTSHEET FOR MATLAB 
************************************************************

*** A) Go onto the parent level (to obtain sufficient power per day)

*bidder id's
replace bidderid=46 if bidderid==46001
replace bidderid=18 if bidderid==18001
replace bidderid=24 if bidderid==24001

replace homedealerID = 46 if homedealerID==46001
replace homedealerID = 18 if homedealerID==18001
replace homedealerID = 24 if homedealerID==24001

*quote on parent level (filled in)
bys period bidderid IIROC_SIDE trading_venue large_trade: egen a0 = mean(IIROC_YIELD) if trading_venue=="CanDeal" 
bys period bidderid IIROC_SIDE large_trade: egen quote2 = max(a0)
drop a0 
count if quote2==. 
bys bidderid IIROC_SIDE large_trade (period):    replace quote2 = quote2[_n-1] if quote2==.    //this doesn't change much in the regressions 
gen period_neg = -period
bys bidderid IIROC_SIDE large_trade (period_neg): replace quote2 = quote2[_n-1] if quote2==.	
count if quote2==.
replace quote = quote2 

	*check if quote is unique:
	preserve
	collapse IIROC_QUANTITY, by(bidderid IIROC_SIDE period quote large_trade) 
	bys bidderid IIROC_SIDE period: gen a0=_N
	tab a0
	restore 	


*** B) Fill in missing sigma, quote, xij, EP  (where bidder did not trade on the platform)

*fill in sigma  for the baseline dealer
egen a0 = max(sigma)
replace sigma = a0 if sigma==.
drop a0

*fill in fet and constant
bys period: egen a0=max(fet)
replace fet=a0 if fet==.
drop a0 
count if fet==. 

*fill in constant 
egen a0 = max(constant)
replace constant = a0 if constant==.
drop a0 

*fill in delta 
bys large_trade: egen a0=max(delta)
replace delta = a0 if delta==.
drop a0

*fill in fej 
bys bidderid: egen a0=mean(fej) // this was max() before SEP 16, 2022
replace fej = a0 
replace fej=0 if bidderid==$basedealer
drop a0 

*use residuals as non-homedealer 
bys period bidderid homedealer large_trade: egen  a0=max(resids) if homedealer==0
bys period bidderid large_trade: egen  a1=max(a0) 
replace resids=a1
replace resids = 0 if resids==.
drop a0 a1

*fill in xij xijH 
gen double xij  = resids + constant + fej + fet  
gen double xijH = resids + constant + fej +  delta + fet

replace xij = 0 if bidderid==$basedealer
replace xijH = delta if bidderid==$basedealer


*** C) Graphs  	 
preserve 
replace fet = fet + constant 	
collapse xijH xij fet, by(bidderid  year week)
gen netHxij = xijH-xij 
tabstat xij netHxij, stats(count mean p50 min max)
	
graph box xij xijH, over(bidderid)  yti(basis points, size(vlarge) )   graphregion(color(white)) marker(1, msize(vsmall vsmall) msymbol(circle_hollow circle_hollow) mcolor(black%70))   graphregion(color(white)) ylabel(,labsize(large))  bar(1, fcolor(white) lcolor(black)) noout  leg(order(1 2) row(1) label(1 "quality as dealer") label(2 "quality as home dealer")  region(lc(white))) 
graph export "`dropbox_fig'/graph_box_xijs1_$week _$homedealer _$keepall.png", replace
	
graph box xij netHxij, over(bidderid)  yti(basis points, size(vlarge) )   graphregion(color(white)) marker(1, msize(vsmall vsmall) msymbol(circle_hollow circle_hollow) mcolor(black%70))   graphregion(color(white)) ylabel(,labsize(large))  bar(1, fcolor(white) lcolor(black)) noout  leg(order(1 2) row(1) label(1 "dealer quality") label(2 "home dealer benefit")  region(lc(white))) 
graph export "`dropbox_fig'/graph_box_xijs2_$week _$homedealer _$keepall.png", replace
restore 
	

*** D) Create variables and drop few missings to avoid problems when running the matlab code 

*** normalize xij, fej's so that it matches the matlab code (and model as written)
replace xij  = xij *sigma
replace xijH = xijH *sigma
replace fej  = sigma*fej

	*how many quotes are missing when dealers trade OTC?
	count if quote==. 
	count if xij==.
	count if xijH==.
	count if fej==.
	count if fet==.
	tab period if fet==.


*use average theta per day
bys period: egen a0 = mean(midyield_bl)
replace midyield_bl=a0 
drop a0 

*drop empty midyield and xij
drop if midyield_bl==.
drop if IIROC_YIELD==.
count if xij==.	

*create baseline dealer
gen basedealer =  $basedealer

*create date-investortype-side grid for latlab code
drop id
egen id = group(period retailer IIROC_SIDE large_trade)
bys period retailer IIROC_SIDE bidderid large_trade (exec_time): gen countingj = _n
bys period retailer IIROC_SIDE bidderid homedealer large_trade (exec_time): gen countingjH = _n

*create time grid 
gen a0 = exec_time_grid
egen a1 = min(exec_time_grid)
gen time_h = (a0 - a1) /(100*60*60) //hours
generate string_time = string( exec_time_grid,"%tc") 
drop a0 a1 

*make all string variables numeric:
sort bidderid date 
destring bidderid, replace

egen 	lei = group(LEI)
replace lei = 0 if LEI=="client"
replace lei = -999 if LEI=="NA"

gen 	venue2 =1  if trading_venue=="OTC"
replace venue2 = 2 if trading_venue=="CanDeal"

drop side 
gen  side 	 =  1 if IIROC_SIDE=="BUY"
replace side = -1 if IIROC_SIDE=="SELL"

gen retail_dummy = (retailer==1)


	*check that nothing that I need is missing	
	count if IIROC_PRICE==.
	count if IIROC_YIELD==.
	count if quote==.
	count if IIROC_QUANTITY==.

	count if midyield_bl==. 		    
	count if xij ==.

*create a single variable with the benefit of the dealer to the investor
gen xij2 = xij if homedealer==0
replace xij2 = xijH if homedealer==1


*cij (no longer used)
gen ctij = IIROC_YIELD - quote if trading_venue=="OTC"

*EP (no longer used)
gen EP = .

*create weight by the market sizes of the dealers (no longer used)
gen sharei	=0
gen Ei		=0

levelsof side, local(levels_sides) 
foreach l of local levels_sides {

*market share
bys bidderid  period IIROC_SIDE : gen a0 = _N  	if  market_type=="secondary" & side==`l'
replace Ei = a0
drop a0

*shareE_tsi as % of total platform market share (across dealers)
bys period IIROC_SIDE : gen a0 = _N  				if market_type=="secondary"  & side==`l'
replace sharei = Ei/a0 if side==`l'
drop a0
}


*Set to NaN what is no longer used in Matlab (to not get confused in Matlab code)
replace bidyield_can=.
replace askyield_can=.
replace bidyield_bl=.
replace askyield_bl=.

*sort 
sort id exec_time 

browse id period retail_dummy side  midyield_bl large_trade askyield_can  IIROC_YIELD venue2 bidyield_bl askyield_bl  share_Ei_H share_OTCi xij sigma bidderid countingj quote feij ctij  basedealer if trading_venue=="OTC"


************************************************************
*** OUTSHEET FOR MATLAB
************************************************************

outsheet id period retail_dummy side  midyield_bl large_trade askyield_can  IIROC_YIELD venue2 bidyield_bl askyield_bl   share_Ei share_OTCi xij2  sigma bidderid countingj quote feij ctij  basedealer homedealerID xij xijH homedealer countingjH  using "1_input/MatlabEstimationData_$week _$homedealer _$keepall _$quantityrobust _$largetrade _$fej.csv", replace
		  
